﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using LDA;

namespace Gutenberg
{
    class Program
    {
        static void Main(string[] args)
        {
            int K = int.Parse(args[0]); //number of topics
            StreamWriter resultFile = new StreamWriter("results." + K + ".topics.txt");
            string textPath = @"..\..\..\..\Text Corpus\";
            DirectoryInfo di = new DirectoryInfo(textPath + @"\Index");
            FileInfo[] rgFiles = di.GetFiles("*.index.txt");

            LinkedList<string> vocabList = new LinkedList<string>();
            StreamReader dictFile = new StreamReader(textPath + @"Index\vocab.txt");

            string line = dictFile.ReadLine();
            while (line != null)
            {
                if (line.Equals(""))
                {
                    line = dictFile.ReadLine();
                    continue;
                }
                string[] tokens = line.Split('\t');
                vocabList.AddLast(tokens[1].Trim());
                line = dictFile.ReadLine();
            }
            dictFile.Close();

            string[] vocabArray = vocabList.ToArray();
            int V = vocabArray.Length;

            int M = rgFiles.Length;
            int[][] W = new int[M][];

            for (int m = 0; m < M; m++)
            {
                LinkedList<int> docwords = new LinkedList<int>();
                StreamReader sr = new StreamReader(rgFiles[m].FullName);
                line = sr.ReadLine();
                while (line != null)
                {
                    if (line.Equals(""))
                    {
                        line = sr.ReadLine();
                        continue;
                    }
                    string[] tokens = line.Split('\t');
                    docwords.AddLast(int.Parse(tokens[0]));
                    line = sr.ReadLine();
                }
                sr.Close();
                W[m] = docwords.ToArray();
            }

            LDAGibbs lda = new LDAGibbs(1, 1, K, W, vocabArray, 1000);
            lda.MCMC();
            double[,] phi = lda.Phi;
            double[,] theta = lda.Theta;
            Result[][] topicWords = lda.TopicWords;

            for (int k = 0; k < K; k++)
            {
                resultFile.WriteLine("Topic {0}", k);
                for (int v = 0; v < 100; v++)
                {
                    resultFile.WriteLine("{0}\t{1}\t{2}", v + 1, topicWords[k][v].Word, topicWords[k][v].Prob);
                }
                resultFile.WriteLine();
            }

            for (int m = 0; m < M; m++)
            {
                resultFile.Write("{0}", rgFiles[m].Name);
                for (int k = 0; k < K; k++)
                {
                    resultFile.Write("\t{0}", theta[m, k]);
                }
                resultFile.WriteLine();
            }

            resultFile.Close();

            //Console.WriteLine("Press any key");
            //Console.ReadKey();
        }
    }
}
